******************************
* SET OPTIONS; SET LOCATIONS *
******************************
clear all
set trace off
set more off
global external_dir "D:\Dropbox\Michigan\RDC Project\AEJ Data Set\"			/*CHANGE THIS TO THE DIRECTORY THAT HAS THE FILES*/
global internal_dir "\projects\"											/*This is an internal RDC directory*/

capture log close


*********
* Setup *
*********

*Delete any previously cleaned files
forval i = 1(1)51 {
	cap erase "$internal_dir/data/Census 2000/census2000_`i'"
	}
cap erase "$internal_dir/data/Census 2000/census2000_age_migs.dta"
cap erase "$internal_dir/data/Census 2000/census2000est_disagg.dta"

*This directory contains all of the original "Personal" Census 2000 files 
	*(by state), which have been converted to .dta format using StatTransfer
local files : dir "/projects/data/Census 2000" files "*.dta"

cd "/projects/data/Census 2000"


********************
* Procure raw data *
********************

*Read in each state's "Personal" Census 1980 file, keeping only immigrants 
	*and necessary variables
local i = 1
foreach file in `files' {
	cap erase "$internal_dir/data/Census 2000/census2000_`i'.dta"
	display "`file'"
	use "`file'", clear
	qui drop if qcitizen=="3" 			/*Keep only immigrants*/
	qui keep pwt qyr2us qpobst qcitizen qhigh qage 	/*Keep only necessary variables*/
	qui destring qpobst, replace
	qui destring qyr2us, replace
	qui keep if qpobst>=60 & qpobst<=528 		/*Keep only immigrants*/
	rename qyr yoe 	
	rename qpob pob
	gen st = substr("`file'",1,2)
	qui save "$internal_dir/data/Census 2000/census2000_`i'"
	local i = `i'+1
	}
display "`i'"
clear 


************************
* Append and Harmonize *
************************

*Append together to create dataset with all immigrant observations from 1980 Census
local end = `i'-1
forval i = 1(1)`end' {
	append using "$internal_dir/data/Census 2000/census2000_`i'.dta"
	erase "$internal_dir/data/Census 2000/census2000_`i'.dta"
	}

gen sample = 1 /* This will be used to create counts for each regression cell */

*Netherlands Antilles
replace pob = 336 if pob>=344 & pob<=348

*"Korea" --> South Korea
replace pob = 220 if pob==217

*St. Barts --> Guadeloupe
replace pob = 331 if pob==337


**********************	
* Relevant Estimates *
**********************

*To produce yearly inflow estimates: collapse (sum) pwt, by(yoe pob)
*To produce counts for supporting documentation: collapse (sum) sample, by(yoe pob)
preserve			
	collapse (sum) sample pwt, by(yoe pob)
	rename pwt flows2000
	rename sample sample2000
	rename yoe year
	rename pob country
	drop if year==2000	/*2000 incomplete since survey in April*/
	save "$internal_dir/data/Census 2000/census2000est_flows.dta", replace
restore

*To produce yearly inflow estimates by age: collapse (sum) pwt, by(yoe pob age_at_mig)
*To produce counts for supporting documentation: collapse (sum) sample, by(yoe pob age_at_mig)
preserve
	gen age_at_mig = qage-(2000-yoe) /*Age at migration is current age minus years since entry*/
	collapse (sum) sample pwt, by(yoe pob age_at_mig) fast
	rename yoe year
	rename pob country
	rename pwt migs
	save "$internal_dir/data/Census 2000/census2000_flows_by_age.dta", replace
restore
